#ifndef NPY_SIMD
    #error "Not a standalone header"
#endif

#ifndef _NPY_SIMD_AVX2_MISC_H
#define _NPY_SIMD_AVX2_MISC_H

// vector with zero lanes
#define npyv_zero_u8  _mm256_setzero_si256
#define npyv_zero_s8  _mm256_setzero_si256
#define npyv_zero_u16 _mm256_setzero_si256
#define npyv_zero_s16 _mm256_setzero_si256
#define npyv_zero_u32 _mm256_setzero_si256
#define npyv_zero_s32 _mm256_setzero_si256
#define npyv_zero_u64 _mm256_setzero_si256
#define npyv_zero_s64 _mm256_setzero_si256
#define npyv_zero_f32 _mm256_setzero_ps
#define npyv_zero_f64 _mm256_setzero_pd

// vector with a specific value set to all lanes
#define npyv_setall_u8(VAL)  _mm256_set1_epi8((char)VAL)
#define npyv_setall_s8(VAL)  _mm256_set1_epi8((char)VAL)
#define npyv_setall_u16(VAL) _mm256_set1_epi16((short)VAL)
#define npyv_setall_s16(VAL) _mm256_set1_epi16((short)VAL)
#define npyv_setall_u32(VAL) _mm256_set1_epi32((int)VAL)
#define npyv_setall_s32(VAL) _mm256_set1_epi32(VAL)
#define npyv_setall_u64(VAL) _mm256_set1_epi64x(VAL)
#define npyv_setall_s64(VAL) _mm256_set1_epi64x(VAL)
#define npyv_setall_f32(VAL) _mm256_set1_ps(VAL)
#define npyv_setall_f64(VAL) _mm256_set1_pd(VAL)

/*
 * vector with specific values set to each lane and
 * set a specific value to all remained lanes
 *
 * Args that generated by NPYV__SET_FILL_* not going to expand if
 * _mm256_setr_* are defined as macros.
*/
NPY_FINLINE __m256i npyv__setr_epi8(
    char i0,  char i1,  char i2,  char i3,  char i4,  char i5,  char i6,  char i7,
    char i8,  char i9,  char i10, char i11, char i12, char i13, char i14, char i15,
    char i16, char i17, char i18, char i19, char i20, char i21, char i22, char i23,
    char i24, char i25, char i26, char i27, char i28, char i29, char i30, char i31)
{
    return _mm256_setr_epi8(
        i0,  i1,  i2,  i3,  i4,  i5,  i6,  i7,  i8,  i9,  i10, i11, i12, i13, i14, i15,
        i16, i17, i18, i19, i20, i21, i22, i23, i24, i25, i26, i27, i28, i29, i30, i31
    );
}
NPY_FINLINE __m256i npyv__setr_epi16(
    short i0,  short i1,  short i2,  short i3,  short i4,  short i5,  short i6,  short i7,
    short i8,  short i9,  short i10, short i11, short i12, short i13, short i14, short i15)
{
    return _mm256_setr_epi16(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15);
}
NPY_FINLINE __m256i npyv__setr_epi32(int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7)
{
    return _mm256_setr_epi32(i0, i1, i2, i3, i4, i5, i6, i7);
}
NPY_FINLINE __m256i npyv__setr_epi64(npy_int64 i0, npy_int64 i1, npy_int64 i2, npy_int64 i3)
{
    return _mm256_setr_epi64x(i0, i1, i2, i3);
}

NPY_FINLINE __m256 npyv__setr_ps(float i0, float i1, float i2, float i3, float i4, float i5,
                                 float i6, float i7)
{
    return _mm256_setr_ps(i0, i1, i2, i3, i4, i5, i6, i7);
}
NPY_FINLINE __m256d npyv__setr_pd(double i0, double i1, double i2, double i3)
{
    return _mm256_setr_pd(i0, i1, i2, i3);
}
#define npyv_setf_u8(FILL, ...)  npyv__setr_epi8(NPYV__SET_FILL_32(char, FILL, __VA_ARGS__))
#define npyv_setf_s8(FILL, ...)  npyv__setr_epi8(NPYV__SET_FILL_32(char, FILL, __VA_ARGS__))
#define npyv_setf_u16(FILL, ...) npyv__setr_epi16(NPYV__SET_FILL_16(short, FILL, __VA_ARGS__))
#define npyv_setf_s16(FILL, ...) npyv__setr_epi16(NPYV__SET_FILL_16(short, FILL, __VA_ARGS__))
#define npyv_setf_u32(FILL, ...) npyv__setr_epi32(NPYV__SET_FILL_8(int, FILL, __VA_ARGS__))
#define npyv_setf_s32(FILL, ...) npyv__setr_epi32(NPYV__SET_FILL_8(int, FILL, __VA_ARGS__))
#define npyv_setf_u64(FILL, ...) npyv__setr_epi64(NPYV__SET_FILL_4(npy_uint64, FILL, __VA_ARGS__))
#define npyv_setf_s64(FILL, ...) npyv__setr_epi64(NPYV__SET_FILL_4(npy_int64, FILL, __VA_ARGS__))
#define npyv_setf_f32(FILL, ...) npyv__setr_ps(NPYV__SET_FILL_8(float, FILL, __VA_ARGS__))
#define npyv_setf_f64(FILL, ...) npyv__setr_pd(NPYV__SET_FILL_4(double, FILL, __VA_ARGS__))

// vector with specific values set to each lane and
// set zero to all remained lanes
#define npyv_set_u8(...)  npyv_setf_u8(0,  __VA_ARGS__)
#define npyv_set_s8(...)  npyv_setf_s8(0,  __VA_ARGS__)
#define npyv_set_u16(...) npyv_setf_u16(0, __VA_ARGS__)
#define npyv_set_s16(...) npyv_setf_s16(0, __VA_ARGS__)
#define npyv_set_u32(...) npyv_setf_u32(0, __VA_ARGS__)
#define npyv_set_s32(...) npyv_setf_s32(0, __VA_ARGS__)
#define npyv_set_u64(...) npyv_setf_u64(0, __VA_ARGS__)
#define npyv_set_s64(...) npyv_setf_s64(0, __VA_ARGS__)
#define npyv_set_f32(...) npyv_setf_f32(0, __VA_ARGS__)
#define npyv_set_f64(...) npyv_setf_f64(0, __VA_ARGS__)

// Per lane select
#define npyv_select_u8(MASK, A, B)  _mm256_blendv_epi8(B, A, MASK)
#define npyv_select_s8  npyv_select_u8
#define npyv_select_u16 npyv_select_u8
#define npyv_select_s16 npyv_select_u8
#define npyv_select_u32 npyv_select_u8
#define npyv_select_s32 npyv_select_u8
#define npyv_select_u64 npyv_select_u8
#define npyv_select_s64 npyv_select_u8
#define npyv_select_f32(MASK, A, B) _mm256_blendv_ps(B, A, _mm256_castsi256_ps(MASK))
#define npyv_select_f64(MASK, A, B) _mm256_blendv_pd(B, A, _mm256_castsi256_pd(MASK))

// Reinterpret
#define npyv_reinterpret_u8_u8(X)  X
#define npyv_reinterpret_u8_s8(X)  X
#define npyv_reinterpret_u8_u16(X) X
#define npyv_reinterpret_u8_s16(X) X
#define npyv_reinterpret_u8_u32(X) X
#define npyv_reinterpret_u8_s32(X) X
#define npyv_reinterpret_u8_u64(X) X
#define npyv_reinterpret_u8_s64(X) X
#define npyv_reinterpret_u8_f32 _mm256_castps_si256
#define npyv_reinterpret_u8_f64 _mm256_castpd_si256

#define npyv_reinterpret_s8_s8(X)  X
#define npyv_reinterpret_s8_u8(X)  X
#define npyv_reinterpret_s8_u16(X) X
#define npyv_reinterpret_s8_s16(X) X
#define npyv_reinterpret_s8_u32(X) X
#define npyv_reinterpret_s8_s32(X) X
#define npyv_reinterpret_s8_u64(X) X
#define npyv_reinterpret_s8_s64(X) X
#define npyv_reinterpret_s8_f32 _mm256_castps_si256
#define npyv_reinterpret_s8_f64 _mm256_castpd_si256

#define npyv_reinterpret_u16_u16(X) X
#define npyv_reinterpret_u16_u8(X)  X
#define npyv_reinterpret_u16_s8(X)  X
#define npyv_reinterpret_u16_s16(X) X
#define npyv_reinterpret_u16_u32(X) X
#define npyv_reinterpret_u16_s32(X) X
#define npyv_reinterpret_u16_u64(X) X
#define npyv_reinterpret_u16_s64(X) X
#define npyv_reinterpret_u16_f32 _mm256_castps_si256
#define npyv_reinterpret_u16_f64 _mm256_castpd_si256

#define npyv_reinterpret_s16_s16(X) X
#define npyv_reinterpret_s16_u8(X)  X
#define npyv_reinterpret_s16_s8(X)  X
#define npyv_reinterpret_s16_u16(X) X
#define npyv_reinterpret_s16_u32(X) X
#define npyv_reinterpret_s16_s32(X) X
#define npyv_reinterpret_s16_u64(X) X
#define npyv_reinterpret_s16_s64(X) X
#define npyv_reinterpret_s16_f32 _mm256_castps_si256
#define npyv_reinterpret_s16_f64 _mm256_castpd_si256

#define npyv_reinterpret_u32_u32(X) X
#define npyv_reinterpret_u32_u8(X)  X
#define npyv_reinterpret_u32_s8(X)  X
#define npyv_reinterpret_u32_u16(X) X
#define npyv_reinterpret_u32_s16(X) X
#define npyv_reinterpret_u32_s32(X) X
#define npyv_reinterpret_u32_u64(X) X
#define npyv_reinterpret_u32_s64(X) X
#define npyv_reinterpret_u32_f32 _mm256_castps_si256
#define npyv_reinterpret_u32_f64 _mm256_castpd_si256

#define npyv_reinterpret_s32_s32(X) X
#define npyv_reinterpret_s32_u8(X)  X
#define npyv_reinterpret_s32_s8(X)  X
#define npyv_reinterpret_s32_u16(X) X
#define npyv_reinterpret_s32_s16(X) X
#define npyv_reinterpret_s32_u32(X) X
#define npyv_reinterpret_s32_u64(X) X
#define npyv_reinterpret_s32_s64(X) X
#define npyv_reinterpret_s32_f32 _mm256_castps_si256
#define npyv_reinterpret_s32_f64 _mm256_castpd_si256

#define npyv_reinterpret_u64_u64(X) X
#define npyv_reinterpret_u64_u8(X)  X
#define npyv_reinterpret_u64_s8(X)  X
#define npyv_reinterpret_u64_u16(X) X
#define npyv_reinterpret_u64_s16(X) X
#define npyv_reinterpret_u64_u32(X) X
#define npyv_reinterpret_u64_s32(X) X
#define npyv_reinterpret_u64_s64(X) X
#define npyv_reinterpret_u64_f32 _mm256_castps_si256
#define npyv_reinterpret_u64_f64 _mm256_castpd_si256

#define npyv_reinterpret_s64_s64(X) X
#define npyv_reinterpret_s64_u8(X)  X
#define npyv_reinterpret_s64_s8(X)  X
#define npyv_reinterpret_s64_u16(X) X
#define npyv_reinterpret_s64_s16(X) X
#define npyv_reinterpret_s64_u32(X) X
#define npyv_reinterpret_s64_s32(X) X
#define npyv_reinterpret_s64_u64(X) X
#define npyv_reinterpret_s64_f32 _mm256_castps_si256
#define npyv_reinterpret_s64_f64 _mm256_castpd_si256

#define npyv_reinterpret_f32_f32(X) X
#define npyv_reinterpret_f32_u8  _mm256_castsi256_ps
#define npyv_reinterpret_f32_s8  _mm256_castsi256_ps
#define npyv_reinterpret_f32_u16 _mm256_castsi256_ps
#define npyv_reinterpret_f32_s16 _mm256_castsi256_ps
#define npyv_reinterpret_f32_u32 _mm256_castsi256_ps
#define npyv_reinterpret_f32_s32 _mm256_castsi256_ps
#define npyv_reinterpret_f32_u64 _mm256_castsi256_ps
#define npyv_reinterpret_f32_s64 _mm256_castsi256_ps
#define npyv_reinterpret_f32_f64 _mm256_castpd_ps

#define npyv_reinterpret_f64_f64(X) X
#define npyv_reinterpret_f64_u8  _mm256_castsi256_pd
#define npyv_reinterpret_f64_s8  _mm256_castsi256_pd
#define npyv_reinterpret_f64_u16 _mm256_castsi256_pd
#define npyv_reinterpret_f64_s16 _mm256_castsi256_pd
#define npyv_reinterpret_f64_u32 _mm256_castsi256_pd
#define npyv_reinterpret_f64_s32 _mm256_castsi256_pd
#define npyv_reinterpret_f64_u64 _mm256_castsi256_pd
#define npyv_reinterpret_f64_s64 _mm256_castsi256_pd
#define npyv_reinterpret_f64_f32 _mm256_castps_pd

#define npyv_cleanup _mm256_zeroall

#endif // _NPY_SIMD_SSE_MISC_H
